---
title: "Motor Vehicle Accidents in Victoria"
output:
flexdashboard::flex_dashboard:
vertical_layout: scroll
orientation: rows
source_code: embed
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE,
eval = TRUE,
message = FALSE,
warning = FALSE)
```
```{r libraries}
library(flexdashboard)
library(tidyverse)
library(lubridate)
library(janitor)
library(plotly)
library(ggResidpanel)
library(broom)
library(knitr)
library(kableExtra)
library(ggmap)
library(ggthemes)
```
```{r load-data}
accidents <- read_csv("data/ACCIDENT.csv") %>%
clean_names()
locations <- read_csv("data/ACCIDENT_LOCATION.csv") %>%
clean_names()
nodes <- read_csv("data/NODE.csv") %>%
clean_names()
persons <- read_csv("data/PERSON.csv") %>%
clean_names()
vehicles <- read_csv("data/VEHICLE.csv") %>%
clean_names()
```
```{r create-year-hour-day}
accidents <- accidents %>%
mutate(accidentdate = dmy(accidentdate),
Year = year(accidentdate),
Hour = hour(accidenttime),
Weekday = wday(accidentdate,
label = TRUE,
abbr = FALSE))
```
Part 1 {data-icon="fa-battery-quarter"}
=====================================
Row {data-width=600}
--------------------------------
### Accidents per year
```{r chen1}
accidents_per_year <- accidents %>%
count(Year) %>%
ggplot(aes(x = Year,
y = n)) +
geom_line() +
xlab("Year(2006 ~ 2020)") +
ylab("Number of Car Accidents") +
geom_point()
ggplotly( accidents_per_year)
```
### Accidents by weekday
```{r chen2}
accidents %>%
count(Weekday,
name = "Accidents") %>%
ggplot(aes(x = Weekday,
y = Accidents)) +
geom_bar(stat = "identity",
fill = "#999999") +
ylab("Number of Car Accidents") +
geom_text(aes(label = Accidents),
vjust = -1,
color = "black",
size = 3)
```
Row {data-width=600}
--------------------------------
### Accidents by hour
```{r chen3}
accidents_by_hour <- accidents %>%
count(Hour,
name = "Accidents")
p3 <- accidents_by_hour %>%
ggplot(aes(x = Hour,
y = Accidents)) +
geom_line() +
xlab("Time") +
ylab("Number of Car Accidents") +
geom_point()
ggplotly(p3)
```
### Deaths by hour
```{r deaths-by-hour}
deaths_by_hour <- accidents %>%
group_by(Hour) %>%
tally(no_persons_killed,
name = "Deaths")
```
```{r deaths-per-accident-by-hour}
deaths_per_accident_by_hour <- accidents_by_hour %>%
left_join(deaths_by_hour) %>%
mutate(Deaths_per_accident = round(Deaths/Accidents, digit = 4))
```
```{r chen4}
p4 <- deaths_per_accident_by_hour %>%
ggplot(aes(x = Hour,
y = Deaths_per_accident)) +
geom_line() +
xlab("Time") +
ylab("Death Rate of Car Accidents") +
geom_point()
ggplotly(p4)
```
Part 2 {data-icon="fa-battery-half"}
=====================================
Row {.tabset data-height=500}
------------
### **Deaths by speed zone**
```{r accidents-by-speed-zone}
accidents_by_speed_zone <- accidents %>%
count(speed_zone,
name = "Accidents")
```
```{r deaths-by-speed-zone}
deaths_by_speed_zone <- accidents %>%
group_by(speed_zone) %>%
tally(no_persons_killed,
name = "Deaths")
```
```{r deaths-per-accident-by-speed-zone}
deaths_by_accident <- accidents_by_speed_zone %>%
left_join(deaths_by_speed_zone) %>%
mutate(Deaths_by_accident = Deaths/Accidents)
```
```{r deaths-per-accident-plot}
deaths_by_accident %>%
mutate(speed_zone = as.numeric(speed_zone)) %>%
filter(speed_zone %in% seq(30, 110, 10)) %>%
ggplot(aes(y = Deaths_by_accident,
x = speed_zone)) +
geom_line() +
labs(x = "Speed Zone",
y = "Deaths by Accident")
ggplotly()
```
Row {.tabset data-height=500}
------------
### **Death rate by year of vehicle manufacture**
```{r join-person-and-vehicle}
person_vehicle <- persons %>%
left_join(vehicles)
```
```{r total-people-involved-in-accidents-per-manufature-year}
person_vehicle_total <- person_vehicle %>%
group_by(vehicle_year_manuf) %>%
tally(name = "Persons",
sort = TRUE)
person_vehicle_deaths <- person_vehicle %>%
filter(inj_level_desc == "Fatality") %>%
group_by(vehicle_year_manuf) %>%
tally(name = "Fatalities",
sort = TRUE)
```
```{r join-total-and-deaths}
death_rate_by_year_manuf <- person_vehicle_total %>%
left_join(person_vehicle_deaths) %>%
mutate(death_rate = Fatalities/Persons) %>%
arrange(desc(vehicle_year_manuf))
```
```{r plot-death-rate-by-year-manuf}
manuf_year_death_rate <- death_rate_by_year_manuf %>%
filter(vehicle_year_manuf >= 1985 & vehicle_year_manuf < 3001)
p1 <- manuf_year_death_rate %>%
ggplot(aes(x = vehicle_year_manuf,
y = death_rate)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Year Manufactured",
y = "Death Rate")
ggplotly(p1)
```
### **Regression model**
```{r regression-model}
manuf_year_death_rate_lm <- lm(death_rate ~ vehicle_year_manuf, data = manuf_year_death_rate)
resid_panel(manuf_year_death_rate_lm, plot = "all")
```
### **Goodness of fit**
```{r}
tidy(manuf_year_death_rate_lm) %>%
kable() %>%
kable_styling(bootstrap_options = "striped")
glance(manuf_year_death_rate_lm) %>%
kable() %>%
kable_styling(bootstrap_options = "striped")
```
Column {.sidebar data-width=350}
----
***
> **Findings**
1. Accidents become increasingly serious the faster the speed at which they occur. The risk of dying in an accident is nearly 13 times higher in a 110km/h zone (0.064 deaths/accident) than in a 40km/h zone (0.005 deaths/accident).
2. A person's risk of dying in an accident is positively correlated with the age of their vehicle; the newer the make, the less likely it is that a person will be killed in an accident. For every year older a vehicle is, the risk of dying if you have an accident in it increases by 0.0002 deaths per accident. The reason for this is improved safety standards for vehicles.
***
Part 3 {data-icon="fa-battery-three-quarters"}
=====================================
Row {.tabset data-height=800}
---------
### **Accidents Map**
```{r accidents-map}
location_box <- c(min(nodes$long),
min(nodes$lat),
max(nodes$long),
max(nodes$lat))
location_map <- get_map(location = location_box,
source = 'osm')
ggmap(location_map) +
geom_point(data = nodes,
aes(x = long,
y = lat),
colour="#0072B2",
alpha=0.5,
size = 0.05) +
theme_map()
```
### **Roads with Most Accidents and Highest Death Rates**
```{r join-road-names}
roads_accidents <- accidents %>%
left_join(locations) %>%
mutate(Road = paste(road_name,
road_type))
```
```{r accidents-by-road}
accidents_per_road <- roads_accidents %>%
count(Road,
name = "Accidents"
,sort = T)
deaths_per_road <- roads_accidents %>%
group_by(Road) %>%
tally(no_persons_killed,
name = "Deaths",
sort = TRUE)
accidents_per_road %>%
head(8) %>%
kable(caption = "Accidents by road") %>%
kable_styling(bootstrap_options = "striped")
```
```{r deadliest-road}
deadliest_road <- accidents_per_road %>%
left_join(deaths_per_road) %>%
filter(Accidents >= 100) %>%
mutate(Deaths_per_accident = Deaths/Accidents) %>%
arrange(desc(Deaths_per_accident))
deadliest_road %>%
head(8) %>%
kable(caption = "Deadliest roads") %>%
kable_styling(bootstrap_options = "striped")
```
### **Accidents by Gender**
```{r accidents-gender-table}
persons <- persons %>%
mutate(sex = recode(sex,
"F" = "Female",
"M" = "Male",
"U" = "Unknown"))
```
```{r accidents-gender-plot}
persons %>%
filter(road_user_type_desc == "Drivers",
sex %in% c("Female",
"Male"),
age > 15) %>%
count(age,
sex,
name = "Accidents") %>%
ggplot(aes(x = age,
y = Accidents,
color = sex)) +
geom_line() +
xlab("Age")
ggplotly()
```
### **Death Rate by User Type**
```{r user-death-rate}
user_type <- persons %>%
count(road_user_type_desc,
name = "User")
user_type_fatal <- persons %>%
filter(inj_level_desc == "Fatality") %>%
count(road_user_type_desc,
name = "Fatal")
user_type %>%
left_join(user_type_fatal) %>%
mutate(death_rate = Fatal / User) %>%
ggplot(aes(x = fct_reorder(road_user_type_desc,
death_rate),
y = death_rate)) +
geom_col(fill = "#999999") +
labs(x = "User Type",
y = "Death Rate") +
theme(axis.text.x = element_text(angle = 45,
hjust = 1))
```
Column {.sidebar data-width=350}
----
***
> **Findings**
1. The accidents are concentrated around Melbourne and branch out as we get further away. Small concentrations are also present around other cities and towns
2. Regional highways are the deadliest types of roads
3. Males commit more accidents than females. Both commit the highest number of accidents as new drivers at a young age, the accidents steadily decrease as the age increases
4. Pedestrians are the biggest casualty of accidents with the highest number of fatalities. Bicyclists are the least at risk of death
***